Example Multiple Exposure Fit

Author

Tom Palmer

Published

May 18, 2023

# library(OneSampleMR)
library(ivreg)
library(gtsummary)
library(dplyr)
library(Statamarkdown)
library(haven)

Simulate data

set.seed(9)
n    <- 1000
psi0 <- 0.5
Z1   <- rbinom(n, 1, 0.5)
Z2   <- rbinom(n, 2, .3)
X1   <- rbinom(n, 1, 0.7*Z1 + 0.2*(1 - Z1))
p2   <- 0.1*Z2 + .4*Z1
summary(p2)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.1000  0.4000  0.2614  0.5000  0.6000
X2   <- rbinom(n, 1, p2)
m0   <- plogis(1 + 0.8*X1 - 0.39*Z1)
Y    <- rbinom(n, 1, plogis(psi0*X1 + psi0*X2 + log(m0/(1 - m0))))
dat  <- data.frame(Z1, Z2, X1, X2, Y)
# fit <- tsps(Y ~ X1 + X2 | Z1 + Z2 , data = dat, link = "logit")
# summary(fit)

Logistic TSPS

fitx1     <- lm(X1 ~ Z1 + Z2, data = dat)
dat$x1hat <- fitted.values(fitx1)
dat$x1res <- residuals(fitx1)

fitx2     <- lm(X2 ~ Z1 + Z2, data = dat)
dat$x2hat <- fitted.values(fitx2)
dat$x2res <- residuals(fitx2)

yfit      <- glm(Y ~ x1hat + x2hat, data = dat, family = binomial)
yfit %>% tbl_regression(exp = TRUE)
Characteristic OR1 95% CI1 p-value
x1hat 2.89 0.18, 44.3 0.4
x2hat 1.83 0.10, 34.4 0.7
1 OR = Odds Ratio, CI = Confidence Interval

Logistic TSRI

yfit1     <- glm(Y ~ x1hat + x2hat + x1res + x2res, data = dat, family = binomial)
yfit1 %>% tbl_regression(exp = TRUE)
Characteristic OR1 95% CI1 p-value
x1hat 3.12 0.18, 50.6 0.4
x2hat 2.25 0.12, 45.3 0.6
x1res 4.10 2.69, 6.41 <0.001
x2res 1.94 1.18, 3.28 0.011
1 OR = Odds Ratio, CI = Confidence Interval

Linear TSPS

  • TSLS multiple exposures fit
mvtsls <- ivreg(Y ~ X1 + X2 | Z1 + Z2, data = dat)
mvtsls %>% tbl_regression()
Characteristic Beta 95% CI1 p-value
X1 0.15 -0.23, 0.54 0.4
X2 0.09 -0.32, 0.50 0.7
1 CI = Confidence Interval
  • Manual fit
yfit2 <- lm(Y ~ x1hat + x2hat, data = dat)
yfit2 %>% tbl_regression()
Characteristic Beta 95% CI1 p-value
x1hat 0.15 -0.24, 0.55 0.4
x2hat 0.09 -0.33, 0.51 0.7
1 CI = Confidence Interval

ivpoisson fit in Stata

write_dta(dat, "dat.dta")
use dat, clear
ivpoisson cfunction Y (X1 X2 = Z1 Z2), nolog irr
Final GMM criterion Q(b) =  2.53e-20

note: model is exactly identified.

Exponential mean model with endogenous regressors

Number of parameters =  11                         Number of obs  =      1,000
Number of moments    =  11
Initial weight matrix: Unadjusted
GMM weight matrix:     Robust

------------------------------------------------------------------------------
             |               Robust
           Y |        IRR   std. err.      z    P>|z|     [95% conf. interval]
-------------+----------------------------------------------------------------
Y            |
          X1 |   1.173178   .2972523     0.63   0.528     .7139897    1.927684
          X2 |   1.152812   .3064273     0.53   0.593     .6847026    1.940954
       _cons |    .727815   .0396271    -5.84   0.000     .6541478    .8097782
-------------+----------------------------------------------------------------
X1           |
          Z1 |   .4595281   .0278654    16.49   0.000     .4049129    .5141434
          Z2 |   .0213404   .0209623     1.02   0.309    -.0197449    .0624257
       _cons |   .2065138   .0225799     9.15   0.000      .162258    .2507696
-------------+----------------------------------------------------------------
X2           |
          Z1 |   .4150268   .0241722    17.17   0.000     .3676501    .4624035
          Z2 |   .1083117   .0198166     5.47   0.000     .0694718    .1471516
       _cons |  -.0048893   .0116681    -0.42   0.675    -.0277584    .0179799
-------------+----------------------------------------------------------------
       /c_X1 |   .0588318   .2574314     0.23   0.819    -.4457244     .563388
       /c_X2 |  -.0506969     .26508    -0.19   0.848    -.5702441    .4688504
------------------------------------------------------------------------------
Note: Estimates are transformed only in the first equation to incidence-rate ratios.
Note: _cons estimates baseline incidence rate.
Instrumented: X1 X2
 Instruments: Z1 Z2
  • Compare to TSPS Poisson
yfit3 <- glm(Y ~ x1hat + x2hat, data = dat, family = poisson)
yfit3 %>% tbl_regression(exp = TRUE)
Characteristic IRR1 95% CI1 p-value
x1hat 1.21 0.39, 3.83 0.7
x2hat 1.11 0.33, 3.70 0.9
1 IRR = Incidence Rate Ratio, CI = Confidence Interval
  • TSRI Gamma distribution with log link
dat <- 
  dat %>%
  mutate(Y1 = case_when(Y == 0 ~ 0.001,
                        Y == 1 ~ 1))
yfit4 <- glm(Y1 ~ X1 + X2 + x1res + x2res, data = dat, family = Gamma(link = "log"))
yfit4 %>% tbl_regression(exp = TRUE)
Characteristic exp(Beta) 95% CI1 p-value
X1 1.17 0.71, 1.94 0.5
X2 1.15 0.68, 1.97 0.6
x1res 1.06 0.64, 1.77 0.8
x2res 0.95 0.55, 1.62 0.9
1 CI = Confidence Interval